library(leaflet)
library(readxl)
library(dplyr)
library(reticulate)
maindata <- read_excel("Documents/Spring2021/DataVisualization/Project/maindata.xlsx")
Error: `path` does not exist: ‘Documents/Spring2021/DataVisualization/Project/maindata.xlsx’

Let look at how many unique values each column has

sapply(maindata, function(x) length(unique(x)))
                    id              log_price          property_type              room_type              amenities 
                 74111                    767                     35                      3                  67122 
          accommodates              bathrooms               bed_type    cancellation_policy           cleaning_fee 
                    16                     18                      5                      5                      2 
                  city            Description           first_review   host_has_profile_pic host_identity_verified 
                     6                  73469                   2555                      3                      3 
    host_response_rate             host_since       instant_bookable            last_review                    lat 
                    81                   3088                      2                   1372                  74058 
                  long                   name          neighbourhood      number_of_reviews   review_scores_rating 
                 73973                  73331                    620                    371                     55 
         thumbnail_url                zipcode               bedrooms                   beds 
                 65884                    669                     12                     19 
library(ggplot2)
maindata%>% ggplot( aes(x=cancellation_policy, y=log_price))+ geom_boxplot()+ ggtitle( "Boxplot for log_price vs cancellation policy")

Do some hypothesis testing.

First, let us plot the different properties, using the latitude and longitude information given in our dataset.

m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat, color = "red",
stroke=FALSE)
m

From the map above, we can see that the properties listed in our datset are from six different locations: Los Angeles, New York, DC, Boston, Chicago, San Fransico etc.

log_price for th different cities.

NYC


rows = (maindata$city== "NYC") 
tmp = maindata[rows, ]

mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)

m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
Some values were outside the color scale and will be treated as NA
m

Talk about which part of NYC has higher log_price and which part has lowest.

Boston.


rows = (maindata$city== "Boston") 
tmp = maindata[rows, ]

mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)

m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m

DC


rows = (maindata$city== "DC") 
tmp = maindata[rows, ]

mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)

m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m

San Francisco.


rows = (maindata$city== "SF") 
tmp = maindata[rows, ]

mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)

m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m

LA


rows = (maindata$city== "LA") 
tmp = maindata[rows, ]

mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)

m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m

We can see that along the edge there are properties that are high priced.

Chicago


rows = (maindata$city== "Chicago") 
tmp = maindata[rows, ]

mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)

m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m

Types of property

rows = (maindata$city== "Chicago") 
tmp = maindata[rows, ]

mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$property_type,
na.color="transparent",
bins=mybins)

m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(property_type),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~property_type, opacity=0.9,
title = "Property type", position = "bottomright" )
Error in cut.default(x, binsToUse, labels = FALSE, include.lowest = TRUE,  : 
  'x' must be numeric
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KGxlYWZsZXQpCmxpYnJhcnkocmVhZHhsKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHJldGljdWxhdGUpCmBgYAoKCmBgYHtyfQptYWluZGF0YSA8LSByZWFkX2V4Y2VsKCJ+L0RvY3VtZW50cy9TcHJpbmcyMDIxL0RhdGFWaXN1YWxpemF0aW9uL1Byb2plY3QvbWFpbmRhdGEueGxzeCIpCnRtcD1tYWluZGF0YQpgYGAKCkxldCBsb29rIGF0IGhvdyBtYW55IHVuaXF1ZSB2YWx1ZXMgZWFjaCBjb2x1bW4gaGFzCgpgYGB7cn0Kc2FwcGx5KG1haW5kYXRhLCBmdW5jdGlvbih4KSBsZW5ndGgodW5pcXVlKHgpKSkKYGBgCgpgYGB7cn0KbWFpbmRhdGEgJT4lIGdyb3VwX2J5KGNhbmNlbGxhdGlvbl9wb2xpY3kpICU+JSBzZWxlY3QoY2FuY2VsbGF0aW9uX3BvbGljeSklPiUgdW5pcXVlKCkKYGBgCmBgYHtyfQpjb3VudChtYWluZGF0YSwgY2FuY2VsbGF0aW9uX3BvbGljeSkgJT4lIG11dGF0ZShyZWxhdGl2ZV9mcmVxPShuL3N1bShuKSkpCmBgYAoKCmBgYHtyfQptYWluZGF0YSAlPiUgZ3JvdXBfYnkoYmVkX3R5cGUpICU+JSBzZWxlY3QoYmVkX3R5cGUpJT4lIHVuaXF1ZSgpCmBgYAoKCmBgYHtyfQptYWluZGF0YSAlPiUgZmlsdGVyKGNpdHk9PSJOWUMiKSAlPiUgc2VsZWN0KGJlZF90eXBlKSAlPiUgY291bnQoYmVkX3R5cGUpICU+JSBtdXRhdGUocmVsYXRpdmVfZnJlcT0obi9zdW0obikpKQpgYGAKCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQptYWluZGF0YSU+JSBnZ3Bsb3QoIGFlcyh4PWNhbmNlbGxhdGlvbl9wb2xpY3ksIHk9bG9nX3ByaWNlKSkrIGdlb21fYm94cGxvdCgpKyBnZ3RpdGxlKCAiQm94cGxvdCBmb3IgbG9nX3ByaWNlIHZzIGNhbmNlbGxhdGlvbiBwb2xpY3kiKQpgYGAKCmBgYHtyfQpnZ3Bsb3QobWFpbmRhdGEsIGFlcyh4PWJlZF90eXBlLCB5PWxvZ19wcmljZSkpKyBnZW9tX2JveHBsb3QoKSsgZ2d0aXRsZSggIkJveHBsb3QgZm9yIGxvZ19wcmljZSB2cyBiZWQgdHlwZSIpCmBgYAoKYGBge3J9CmdncGxvdChtYWluZGF0YSwgYWVzKHg9Y2xlYW5pbmdfZmVlLCB5PWxvZ19wcmljZSkpKyBnZW9tX2JveHBsb3QoKSsgZ2d0aXRsZSggIkJveHBsb3QgZm9yIGxvZ19wcmljZSB2cyBjbGVhbmluZyBmZWUiKQpgYGAKRG8gc29tZSBoeXBvdGhlc2lzIHRlc3RpbmcuCgpgYGB7cn0KdmNkOjptb3NhaWMofmNsZWFuaW5nX2ZlZStpbnN0YW50X2Jvb2thYmxlLCBkYXRhPW1haW5kYXRhLCBzaGFkZT1UUlVFKQpgYGAKCgpGaXJzdCwgbGV0IHVzIHBsb3QgdGhlIGRpZmZlcmVudCBwcm9wZXJ0aWVzLCB1c2luZyB0aGUgbGF0aXR1ZGUgYW5kIGxvbmdpdHVkZSBpbmZvcm1hdGlvbiBnaXZlbiBpbiBvdXIgZGF0YXNldC4gCmBgYHtyfQptIDwtIGxlYWZsZXQodG1wKSAlPiUKYWRkVGlsZXMoKSAlPiUKYWRkUHJvdmlkZXJUaWxlcygiT3BlblN0cmVldE1hcC5CWkgiKSAlPiUKYWRkQ2lyY2xlTWFya2Vycyh+bG9uZywgfmxhdCwgY29sb3IgPSAicmVkIiwKc3Ryb2tlPUZBTFNFKQptCmBgYApGcm9tIHRoZSBtYXAgYWJvdmUsIHdlIGNhbiBzZWUgdGhhdCB0aGUgcHJvcGVydGllcyBsaXN0ZWQgaW4gb3VyIGRhdHNldCBhcmUgZnJvbSBzaXggZGlmZmVyZW50IGxvY2F0aW9uczogTG9zIEFuZ2VsZXMsIE5ldyBZb3JrLCBEQywgQm9zdG9uLCBDaGljYWdvLCBTYW4gRnJhbnNpY28gZXRjLiAKCgojIyBsb2dfcHJpY2UgZm9yIHRoIGRpZmZlcmVudCBjaXRpZXMuIAoKCiMjIyBOWUMgCgpgYGB7cn0KCnJvd3MgPSAobWFpbmRhdGEkY2l0eT09ICJOWUMiKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKVGFsayBhYm91dCB3aGljaCBwYXJ0IG9mIE5ZQyBoYXMgaGlnaGVyIGxvZ19wcmljZSBhbmQgd2hpY2ggcGFydCBoYXMgbG93ZXN0LiAKCiMjIyBCb3N0b24uCgpgYGB7cn0KCnJvd3MgPSAobWFpbmRhdGEkY2l0eT09ICJCb3N0b24iKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKCgojIyMgREMKCmBgYHtyfQoKcm93cyA9IChtYWluZGF0YSRjaXR5PT0gIkRDIikgCnRtcCA9IG1haW5kYXRhW3Jvd3MsIF0KCm15YmlucyA8LSBzZXEoMiwgOCwgYnk9MS41KQpteXBhbGV0dGUgPC0gY29sb3JCaW4oIHBhbGV0dGU9IllsT3JCciIsCmRvbWFpbj10bXAkbG9nX3ByaWNlLApuYS5jb2xvcj0idHJhbnNwYXJlbnQiLApiaW5zPW15YmlucykKCm0gPC0gbGVhZmxldCh0bXApICU+JQphZGRUaWxlcygpICU+JQphZGRQcm92aWRlclRpbGVzKCJPcGVuU3RyZWV0TWFwLkJaSCIpICU+JQphZGRDaXJjbGVNYXJrZXJzKH5sb25nLCB+bGF0LHJhZGl1cyA9IH5sb2dfcHJpY2UsCmZpbGxDb2xvciA9IH5teXBhbGV0dGUobG9nX3ByaWNlKSwKZmlsbE9wYWNpdHkgPSAwLjUsCmNvbG9yID0gIndoaXRlIixzdHJva2U9RkFMU0UKKSU+JQphZGRMZWdlbmQoIHBhbD1teXBhbGV0dGUsIHZhbHVlcz1+bG9nX3ByaWNlLCBvcGFjaXR5PTAuOSwKdGl0bGUgPSAiTG9nX3ByaWNlIiwgcG9zaXRpb24gPSAiYm90dG9tcmlnaHQiICkKbQpgYGAKCgojIyMgIFNhbiBGcmFuY2lzY28uIAoKYGBge3J9Cgpyb3dzID0gKG1haW5kYXRhJGNpdHk9PSAiU0YiKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKCiMjIyBMQQoKYGBge3J9Cgpyb3dzID0gKG1haW5kYXRhJGNpdHk9PSAiTEEiKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKCldlIGNhbiBzZWUgdGhhdCBhbG9uZyB0aGUgZWRnZSB0aGVyZSBhcmUgcHJvcGVydGllcyB0aGF0IGFyZSBoaWdoIHByaWNlZC4gCgoKIyMjIENoaWNhZ28KCmBgYHtyfQoKcm93cyA9IChtYWluZGF0YSRjaXR5PT0gIkNoaWNhZ28iKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKIyMgVHlwZXMgb2YgcHJvcGVydHkgCgpgYGB7cn0Kcm93cyA9IChtYWluZGF0YSRjaXR5PT0gIkNoaWNhZ28iKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRwcm9wZXJ0eV90eXBlLApuYS5jb2xvcj0idHJhbnNwYXJlbnQiLApiaW5zPW15YmlucykKCm0gPC0gbGVhZmxldCh0bXApICU+JQphZGRUaWxlcygpICU+JQphZGRQcm92aWRlclRpbGVzKCJPcGVuU3RyZWV0TWFwLkJaSCIpICU+JQphZGRDaXJjbGVNYXJrZXJzKH5sb25nLCB+bGF0LHJhZGl1cyA9IH5sb2dfcHJpY2UsCmZpbGxDb2xvciA9IH5teXBhbGV0dGUocHJvcGVydHlfdHlwZSksCmZpbGxPcGFjaXR5ID0gMC41LApjb2xvciA9ICJ3aGl0ZSIsc3Ryb2tlPUZBTFNFCiklPiUKYWRkTGVnZW5kKCBwYWw9bXlwYWxldHRlLCB2YWx1ZXM9fnByb3BlcnR5X3R5cGUsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJQcm9wZXJ0eSB0eXBlIiwgcG9zaXRpb24gPSAiYm90dG9tcmlnaHQiICkKbQptYXgobWFpbmRhdGEkbG9nX3ByaWNlKQpgYGAKCg==